#Author: Linda Sommerfeld
#Date: January 2023
#Related manuscript: Visual Context Affects Children’s and Adults’ Cognitive Load Engaged in Predictive Language Processing
#Type of data: Pupil size data that are already preprocessed for analysis




################################################################################################################################################################################################
### OVERVIEW ###################################################################################################################################################################################
################################################################################################################################################################################################

### PREPARING 
# Remove observations for which comprehension question was answered incorrectly
# Bring variables into correct format for analysis

### ANALYSIS
# Contrast coding (factors condition & age group)
# Control model on baseline pupil size values
# Contrast coding (factor region of interest)
# Run linear mixed effect models (LMERs) 
# Model with all factors: proportional pupil size ~ condition * region * age group
# Model splits by age group, by region, and by age group AND region

### PLOT DATA ACROSS SENTENCE COURSE

### DESCREPTIVE INFORMATION 
# Age, gender, psychometric tests




################################################################################################################################################################################################
### LOAD LIBRARIES, SET WORKING DIRECTION, LOAD DATA ###########################################################################################################################################
################################################################################################################################################################################################

library(lme4)
library(ggplot2)
library(psych)
library(lattice)
library(influence.ME)
library(plyr)
library(dplyr)
library(tidyr)
library(tibble)
library(readr)
library(anytime)
library(languageR)
library(stringr)
library(Rmisc)
library(psych)
library(lmerTest)
library(reshape)
library(ez)
options(scipen =999)

setwd("")
d <- read.csv2("pupil_for publication_preprocessed.csv")




####################################
### EXPLANATION OF THE DATA FILE ###
####################################

colnames(d)

#group =      age group --> Children & Adults
#subject =    experimental subject ID
#item =       number of item (range = 1 - 32)
#condition =  visual condition --> 0-, 1-, 3-, or 4-consistent
#trial_nr  =  number of the trial

#position =   region of interest in the sentence (e.g., verb region, noun region, ...) in which the current data were obtained
#pup_base =   raw pupil size in baseline
#pup_prop =   proportional pupil size in respective region relative to the baseline

#accu =       accuracy (1 = correct, 0 = incorrect) of the respective subject for the comprehension question of the respective item
#age =        age of participant
#gender =     gender of participant (1 = male, 2 = female)
#ppvt =       score in the Peabody Picture Vocabulary Test
#svft =       score in the Semantic Verbal Fluency Task
#cnt =        score in the Color Naming Task




################################################################################################################################################################################################
### PREPARING ##################################################################################################################################################################################
################################################################################################################################################################################################

#remove items for which comprehension question was answered incorrectly
d <- subset(d, accu == "1")

#some formating of relevant variables
d$pup_prop <- as.numeric(d$pup_prop)
d$condition <- as.factor(d$condition)
d$position <- as.factor(d$position)
d$group <- as.factor(d$group)




################################################################################################################################################################################################
### ANALYSIS ###################################################################################################################################################################################
################################################################################################################################################################################################

########################################################
### CONTRAST CODING (FACTORS: CONDITION & AGE GROUP) ###
########################################################

#CONDITION CONTRAST (v1 = 1:0, v2 = 1:3+4; v3 = 3:4)
d <- mutate(d, v1 = ifelse(condition == "3", 0, ifelse(condition == "4", 0, ifelse(condition == "1", 1/2, -1/2))),
               v2 = ifelse(condition == "0", 0, ifelse(condition == "1", 1/3, -2/3)),
               v3 = ifelse(condition == "0", 0, ifelse(condition == "1", 0, ifelse(condition == "3", 1/2, -1/2))))

#AGE GROUP CONTRAST (a = adults:children)
d <- mutate(d, a = ifelse(group == "Adults", 1/2, -1/2))




##############################
### BASELINE CONTROL MODEL ###
##############################

#generate subset with pupil size in baseline region only
b <- subset(d, position == "base")
#format pup_base (= pupil size in baseline) as numeric
b$pup_base <- as.numeric(b$pup_base)

#run baseline model
basem <- lmer(pup_base ~ (v1+v2+v3) * a  +
                (1 + (v1+v3) || subject)+
                (1 +  a   || item),
              data = b)
summary(basem)
#confint(basem)
rm(b, basem)

#no condition differences in baseline, only an age group difference (larger baseline in kids vs adults)
#thus, we do not include this region in further analyses and remove it from the data set
d <- subset(d, position != "base")




###################################################
### CONTRAST CODING (FACTOR REGION OF INTEREST) ###
###################################################

#REGION CONTRAST (t1 = verb:spill-over(gleich), t2 = spill-over(gleich):noun, t3 = noun:postview)
d <- mutate(d, t1 = ifelse(position == "post", 0, ifelse(position == "noun", 0, ifelse(position== "verb", 1/2, -1/2))), #pre vs. verb
               t2 = ifelse(position == "positiont", 0, ifelse(position == "verb", 0, ifelse(position== "gleich", 1/2, -1/2))), 
               t3 = ifelse(position == "verb", 0, ifelse(position == "gleich", 0, ifelse(position== "noun", 1/2, -1/2)))) #verb vs. noun




############################################################
### "MAIN" MODEL: PUPIL ~ CONDITION * REGION * AGE GROUP ###
############################################################

model <- lmer(pup_prop ~ (v1+v2+v3) * (t1+t2+t3) * a  +
                (1 + (v1+v2+v3) ||  subject)+
                (1 + (v1+v3) * a  || item),
              data = d)
summary(model)
#confint(model)
rm(model)




#############################
### MODEL SPLIT BY REGION ###
#############################

#create subsets for noun & postview region
datnoun <- subset(d, position == "noun")
datpost <- subset(d, position == "post")

#run model on noun region data
modnoun <- lmer(pup_prop ~ (v1+v2+v3) * a  +
               (1 + (v1+v2+v3)   || subject),
              # (1 | item),
                data = datnoun)
summary(modnoun)
#confint(modnoun)

#run model on postview region data
modpost <- lmer(pup_prop ~ (v1+v2+v3) * a  +
                  (1 + (v1+v2+v3)   || subject)+
                  (1 | item),
                   data = datpost)
summary(modpost)
#confint(modpost)
rm(modnoun, modpost, datnoun, datpost)




#################################
### MODEL SPLITS BY AGE GROUP ###
#################################

###
### ADULTS
###

#create subsets with adult data only 
adu <- subset(d, group == "Adults") #adults
adun <- subset(adu, position == "noun")  #adults, noun region
adup <- subset(adu, position == "post")  #adults, postview region

#model for adults with all factors: pupil adults ~ condition * region
madu <- lmer(pup_prop ~ (v1+v2+v3) * (t1+t2+t3)  +
                (1 + (v1+v2+v3)   || subject)+
                (1 + (v1+v2+v3)  || item),
              data = adu)
summary(madu)
#confint(madu)
rm(madu, adu)


#model for adults for noun region 
madun <- lmer(pup_prop ~ (v1+v2+v3)  +
             (1 +  (v1+v3) || subject),
            # (1| item),
           data = adun)
summary(madun)
#confint(madun)
rm(madun, adun)


#model for adults for postview region 
madup <- lmer(pup_prop ~ (v1+v2+v3)  +
             (1 +  (v1+v3) || subject)+
             (1 + (v3) || item),
           data = adup)
summary(madup)
#confint(madun)
rm(madup, adup)




###
### CHILDREN
###

#create subsets with child data only 
kid <- subset(d, group == "Children")  #children
kidn <- subset(kid, position == "noun")     #children, noun region
kidp <- subset(kid, position == "post")     #children, postview region

#model for children with all factors: pupil children ~ condition * region
kidm <- lmer(pup_prop ~ (v1+v2+v3) * (t1+t2+t3) +
               (1 + (v1+v2+v3)  || subject)+
               (1 + (v1+v2+v3)   || item),
             data = kid)
summary(kidm)
#confint(kidm)
rm(kidm, kid)


#model for children for noun region 
mkidn <- lmer(pup_prop ~ (v1+v2+v3) +
               (1 + (v1+v3) || subject),
              # (1 | item),
             data = kidn)
summary(mkidn)
#confint(mkidn)
rm(mkidn, kidn)


#model for children for postview region 
mkidp <- lmer(pup_prop ~ (v1+v2+v3) +
               (1 + (v1+v3)  || subject)+
               (1 | item),
                data = kidp)
summary(mkidp)
#confint(mkidp)
rm(mkidp, kidp)




############################################
### PLOT DATA ACROSS THE SENTENCE COURSE ###
############################################

#we generate variable "pos" which contains the information of the variable "position"
#this is to prevent to get confused with ggplot functions names "position"...
d <- mutate(d, pos = ifelse(position == "verb", "Verb",
                     ifelse(position == "gleich", "Spill-over", 
                     ifelse(position == "noun", "Noun", "Postview"))))

plot <- summarySE(d, measurevar="pup_prop", groupvars=c("condition", "pos", "group"), na.rm=T)

#the next two functions allow a renaming of the facet labels in the plot
labels_new <- c("Verb
(eats)", "Spill-over
(soon)", "Noun
(waffle)", "Postview")

names(labels_new) <- c("Verb", "Spill-over", "Noun", "Postview")

ggplot(plot, aes(condition, pup_prop, fill = condition)) +
  geom_col(position = position_dodge(width = .8), width = .8) +
  facet_grid(group~pos, labeller = labeller(pos = labels_new)) +
  guides(fill = FALSE) +
  geom_errorbar(aes(ymin=pup_prop-se, ymax=pup_prop+se), position = position_dodge(.8), width = .15) + 
  theme_bw()+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank())+ 
  theme(strip.background = element_rect(color="black", fill="white", size=0.5, linetype="solid"))+ 
  theme(strip.text.x = element_text(size = 10, color = "black"))+ 
  theme(strip.text.y = element_text(size = 10, color = "black"))+ 
  xlab("Visual Condition") +
  ylab("Averaged Proportional Pupil Size")+
  theme(axis.title.x = element_text(size = 10))+
  theme(axis.title.y = element_text(size = 10))+
  theme(axis.text.x = element_text(size =10))+
  theme(axis.text.y = element_text(size =10)) +
  scale_fill_grey() +
  theme(legend.position = "none")+
  scale_y_continuous(labels = function(x) paste0(x*1, "%")) 

rm(plot, labels_new)




######################################
### DESCRIPTIVE SAMPLE INFORMATION ###
######################################

#N = 61, n = 24 children, n = 37 adults
count(d, subject)

#change format 
d$age <- as.numeric(d$age)
d$ppvt <- as.numeric(d$ppvt)
d$svft <- as.numeric(d$svft)
d$ppvt <- as.numeric(d$ppvt)

#calculate descriptive values
summarise(group_by(d, group), agemean=mean(age, na.rm = T), agesd=sd(age, na.rm = T), agerange = range(age, na.rm = T))
summarise(group_by(d, group), agemean=mean(cnt, na.rm = T), agesd=sd(cnt, na.rm = T), agerange = range(cnt, na.rm = T))
summarise(group_by(d, group), agemean=mean(ppvt, na.rm = T), agesd=sd(ppvt, na.rm = T), agerange = range(ppvt, na.rm = T))
summarise(group_by(d, group), agemean=mean(svft, na.rm = T), agesd=sd(svft, na.rm = T), agerange = range(svft, na.rm = T))